li_len=[]
with open('all.words.txt', 'r', encoding='gbk') as f:
    for line in f:
        sentence = line.strip()  # 去除行首尾空白字符
        if sentence:  # 确保句子不为空
            words = sentence.split()
            li_len.append(len(words))
print(li_len)
import numpy as np

# 假设 li_len 是你的句子长度列表
median = np.median(li_len)  # 中位数
percentile_75 = np.percentile(li_len, 75)  # 75分位数
percentile_90 = np.percentile(li_len, 90)  # 90分位数

print(f"中位数: {median}")
print(f"75分位数: {percentile_75}")
print(f"90分位数: {percentile_90}")

NEG = 0
POS = 0
with open('all.labels.txt', 'r', encoding='gbk') as f:
    for line in f:
        sentence = line.strip()  # 去除行首尾空白字符
        if sentence:  # 确保句子不为空
            if sentence == "NEG":
                NEG+=1
            elif sentence == "POS":
                POS+=1
            else:
                continue  # 跳过不匹配的标签
print(NEG,POS)